import re
from collections import Counter, defaultdict
import csv
from itertools import islice
import os
from sortedcontainers import SortedDict

#filename="./text/30_asean_summit.pdf.txt"
filedir = "./text/"

countdict = []

################### To obtain all the unique words from all the files and do a count 
for txt in os.listdir(filedir):
	
	
	fileExtension = txt.split(".")[-1]
	if fileExtension == "txt":
		filename = filedir + txt
		file = open(filename, encoding="utf8")
		message = file.read()
		#print(message)
		words = re.findall(r"[^\W_]+", message, re.MULTILINE)
		print(len(words))
		#print(words)
		for i in range(len(words)):
			countdict.append(words[i])

		count_words = Counter(words)
		print(len(count_words))
		#print(count_words)

		print(len(count_words))

		#full_list = [{k: v} for k, v in count_words.items()]
		sortedlist = SortedDict(count_words)
		#print(sortedlist)
		#print(full_list)
		#print(len(full_list))






############### For Full word count of combined files, write into csv file #########

count_dict_words = Counter(countdict)
#print(len(count_dict_words))
#print(count_dict_words)
full_list = [{k: v} for k, v in count_dict_words.items()]
print(full_list)

csv_filename = "./word_count.csv"
with open(csv_filename, mode='w') as csv_file:
	writer = csv.DictWriter(csv_file,count_dict_words.keys())
	writer.writeheader()
	writer.writerow(count_dict_words)


########### This code writes into each row 1,2,3,4,... of csv, result will be vertical with all key-value pairs #########
"""
#csv_filename = "./" + txt + ".csv"
csv_filename = "./word_count.csv"
with open(csv_filename, mode='w') as csv_file:
	writer = csv.writer(csv_file)

	for key, value in count_words.items():
		writer.writerow([key, value])
"""

############## Cycles through all the text files, collect the words, sorts and appends the dictionary into the excel file
for txt in os.listdir(filedir):
	
	
	fileExtension = txt.split(".")[-1]
	if fileExtension == "txt":
		filename = filedir + txt
		file = open(filename, encoding="utf8")
		message = file.read()
		#print(message)
		words = re.findall(r"[^\W_]+", message, re.MULTILINE)
		print(len(words))
		#print(words)
		for i in range(len(words)):
			countdict.append(words[i])

		count_words = Counter(words)
		print(len(count_words))
		#print(count_words)

		print(len(count_words))

		sortedlist = SortedDict(count_words)
		csv_filename = "./word_count.csv"
		with open(csv_filename, mode='a') as csv_file:
			writer = csv.DictWriter(csv_file,sortedlist.keys())
			writer.writeheader()
			writer.writerow(sortedlist)

			#writer = csv.writer(csv_file, delimiter=',')
			#for key, value in count_words.items():
			#	writer.writerow([key, value])


